# Load required packages: 
###Title
library("rstudioapi")     
##Set working directory
setwd(dirname(getActiveDocumentContext()$path))
getwd()
library("readxl") 
require(miceadds)
library(data.table)
library(tidyverse)
library(DescTools)
library(htmlTable)
library(stargazer)
library(xlsx)
library(estimatr)
library(tidyverse)
library(geosphere)
library(DescTools)
require(dplyr)

load("FinalFinal20062023Full1-6.RData")
load("FinalFinalSP20062023Full1-6.RData")
require(BART)

# set random seed: 
set.seed(89)

### BART analysis: 

# Vaccine Intention
table(final_finalV2$vaccine_intention)
sum(is.na(final_finalV2$vaccine_intention))

# Vaccine Intention
table(final_finalV2$vaccine_reported_combo)
sum(is.na(final_finalV2$vaccine_reported_combo))

# remove CDC
work = final_finalV2[-which(final_finalV2$cash == "CDC"),]
dim(work)
work$treat = ifelse(work$cash == "cash", 1, 0)

is.factor(work$individual_treatment)
work$Gender = as.factor(work$Gender)
work$Gender45y = as.factor(work$Gender45y)


work2 <- work %>% 
  select(vaccine_reported_combo,
         Gender, 
         Age, 
         WhatsApp,
         Education,
         Income,
         treat, 
         SNMetric
  ) %>% 
  as.data.frame(.) 

work2 = na.omit(work2)
dim(work2)

work2$WhatsApp = as.factor(work2$WhatsApp)
work2$treat = as.factor(work2$treat)

y_train1 <- (work2$vaccine_reported_combo) 

x_train1 <- work2 %>% 
  select(Gender, 
         Age, 
         WhatsApp,
         Education,
         Income,
         treat, 
         SNMetric
         ) %>% 
  as.data.frame(.) 

# Counterfactual data with inverted treatment assignment
x_test1 <- x_train1 %>% 
  mutate(treat = ifelse(treat == 1, 0, 1)) %>% 
  as.data.frame(.)

x_test1$treat = as.factor(x_test1$treat)

# Train continuous prediction model and cast onto counterfactual data
bart_results <- wbart(x.train = x_train1,
                      y.train = y_train1,
                      x.test = x_test1)


y_1 <- ifelse(x_train1$treat == 1, bart_results$yhat.train.mean, bart_results$yhat.test.mean)
y_0 <- ifelse(x_train1$treat == 0, bart_results$yhat.train.mean, bart_results$yhat.test.mean)

# Create data.frame to show ITE and all covariates
ite_results <- x_train1 %>% 
  mutate(ite = y_1 - y_0)

# Plot results
plot_data <- ite_results[order(ite_results$ite),]
plot_data$id <- 1:nrow(plot_data)


summary(plot_data$Income) # cuts on 75, 150, 225 
plot_data$Income = ifelse(plot_data$Income < 75, "Below 75", 
                          ifelse(plot_data$Income >= 75 & plot_data$Income < 150, "75-150",
                                 ifelse(plot_data$Income >= 150 & plot_data$Income < 225, "150-225",
                                        ifelse(plot_data$Income >= 225, "Above 225", NA))))

summary(plot_data$Age) # 30, 50 , 70 
plot_data$Age = ifelse(plot_data$Age < 30, "Below 30", 
                          ifelse(plot_data$Age >= 30 & plot_data$Age < 50, "30-50",
                                 ifelse(plot_data$Age >= 50 & plot_data$Age < 70, "50-70",
                                        ifelse(plot_data$Age >= 70, "Above 70", NA))))




itePlot <- ggplot(plot_data, aes(x=id, y = ite)) +
  geom_line() +
  geom_hline(yintercept= 0, linetype="dashed", color="red") +
  geom_hline(yintercept = mean(plot_data$ite), color = "blue") +
  labs(x="Individual", y = "ITE") +
  theme_minimal() +
  scale_x_continuous(limits = c(0,nrow(plot_data)))

itePlot


covarPlot1 <- ggplot(plot_data, aes(x=id, fill=Gender)) + # Change fill to covar of interest
  geom_histogram(binwidth = 60, position="stack") +
  theme(legend.position="bottom") +
  scale_fill_discrete(name="Gender") + # Change this to rename legend
  labs(y = "Count", x = "Individual") +
  scale_x_continuous(limits = c(0, nrow(plot_data)))
covarPlot1

covarPlot2 <- ggplot(plot_data, aes(x=id, fill=WhatsApp)) + # Change fill to covar of interest
  geom_histogram(binwidth = 60, position="stack") +
  theme(legend.position="bottom") +
  scale_fill_discrete(name="Whatsapp") + # Change this to rename legend
  labs(y = "Count", x = "Individual") +
  scale_x_continuous(limits = c(0, nrow(plot_data)))
covarPlot2

covarPlot3 <- ggplot(plot_data, aes(x=id, fill=Education)) + # Change fill to covar of interest
  geom_histogram(binwidth = 60, position="stack") +
  theme(legend.position="bottom") +
  scale_fill_discrete(name="Education") + # Change this to rename legend
  labs(y = "Count", x = "Individual") +
  scale_x_continuous(limits = c(0, nrow(plot_data)))
covarPlot3

# for income we might need some categories: 
covarPlot4 <- ggplot(plot_data, aes(x=id, fill=Income)) + # Change fill to covar of interest
  geom_histogram(binwidth = 60, position="stack") +
  theme(legend.position="bottom") +
  scale_fill_discrete(name="Food Spent") + # Change this to rename legend
  labs(y = "Count", x = "Individual") +
  scale_x_continuous(limits = c(0, nrow(plot_data)))
covarPlot4


# Age with some categories
covarPlot5 <- ggplot(plot_data, aes(x=id, fill=Age)) + # Change fill to covar of interest
  geom_histogram(binwidth = 60, position="stack") +
  theme(legend.position="bottom") +
  scale_fill_discrete(name="Age") + # Change this to rename legend
  labs(y = "Count", x = "Individual") +
  scale_x_continuous(limits = c(0, nrow(plot_data)))
covarPlot5

require(ggplot2)
library(ggpubr)

# two graphs into one chart 
# Combine all plots into one chart
het_plot1 <- ggarrange(itePlot, covarPlot1,
                      ncol = 1, nrow = 2, heights = c(2,2))
het_plot1
het_plot2 <- ggarrange(itePlot, covarPlot2,
                       ncol = 1, nrow = 2, heights = c(2,2))
het_plot2
het_plot3 <- ggarrange(itePlot, covarPlot3,
                       ncol = 1, nrow = 2, heights = c(2,2))
het_plot3

het_plot4 <- ggarrange(itePlot, covarPlot4,
                       ncol = 1, nrow = 2, heights = c(2,2))
het_plot4

het_plot5 <- ggarrange(itePlot, covarPlot5,
                       ncol = 1, nrow = 2, heights = c(2,2))
het_plot5


het_plot6 <- ggarrange(itePlot, covarPlot1,covarPlot2, covarPlot3,
                       ncol = 1, nrow = 4, heights = c(2,2,2,2))
het_plot6

het_plot7 <- ggarrange(itePlot,
                       covarPlot4, covarPlot5,
                       ncol = 1, nrow = 3, heights = c(2,2,2))
het_plot7


# all in one plot, exported to PDF: 
ggsave(het_plot7, 
       filename = "ExtData_Figure3.pdf", 
       device = "pdf", 
       height = 13, 
       width = 9, 
       dpi = 300)

ggsave(het_plot6, 
       filename = "ExtData_Figure4.pdf", 
       device = "pdf", 
       height = 13, 
       width = 9, 
       dpi = 300)
 





